Data process

read xlsx file

# Install and load the readxl package if not already installed
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Read the Excel file and convert it to a dataframe
df <- read_excel("Hornets data.xlsx")
## Warning: Expecting logical in AN6909 / R6909C40: got
## 'CA_AB_Webb_1959_08_02_001'
# Filter the dataframe and remove rows with missing longitude or latitude
target_df <- df %>%
  filter(stateProvince %in% c('British Columbia', 'Canada - British Columbia (BC)')) %>%
  filter(!is.na(decimalLongitude) | !is.na(decimalLatitude))

Load BC_Covariates

if (!requireNamespace("maptools", quietly = TRUE)) {
  install.packages("maptools", repos = "https://packagemanager.posit.co/cran/2023-10-13")
}
## Please note that 'maptools' will be retired during October 2023,
## plan transition at your earliest convenience (see
## https://r-spatial.org/r/2023/05/15/evolution4.html and earlier blogs
## for guidance);some functionality will be moved to 'sp'.
##  Checking rgeos availability: FALSE
# Load required packages
library(sp)
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: spatstat.geom
## spatstat.geom 3.2-9
## Loading required package: spatstat.random
## spatstat.random 3.2-3
## Loading required package: spatstat.explore
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## spatstat.explore 3.2-7
## Loading required package: spatstat.model
## Loading required package: rpart
## spatstat.model 3.2-11
## Loading required package: spatstat.linnet
## spatstat.linnet 3.1-5
## 
## spatstat 3.0-8 
## For an introduction to spatstat, type 'beginner'
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(maptools)
## 
## Attaching package: 'maptools'
## The following object is masked from 'package:sp':
## 
##     sp2Mondrian
# Import BC_Covariates
load("BC_Covariates.Rda")
window = DATA$Window

Plot distribution

library(ggplot2)

# Plot longitude and latitude coordinates
ggplot(target_df, aes(x = decimalLongitude, y = decimalLatitude)) +
  geom_point(color = "blue") +  # Add points
  labs(x = "Longitude", y = "Latitude",  # Label axes
       title = "Location Coordinates for Hornet within BC",  # Add title
       color = "Data Points") +  # Legend title
  scale_color_manual(values = "blue", name = "Data Points") +  # Legend color and title
  theme_minimal() +  # Apply minimal theme
  theme(
    panel.grid = element_blank(),  # Remove grid lines
    axis.line = element_line(color = "black")  # Color axis lines
  )

Projection process

library(sp)

# Define the projection string
proj_string <- "+proj=aea +lat_0=45 +lon_0=-126 +lat_1=50 +lat_2=58.5 +x_0=1000000 +y_0=0 +datum=NAD83 +units=m +no_defs"

# Create a SpatialPoints object from latitude and longitude
points <- SpatialPoints(coords = target_df[, c("decimalLongitude", "decimalLatitude")],
                        proj4string = CRS("+proj=longlat +datum=WGS84"))

# Transform the points to BC Albers projection
points_bc_albers <- spTransform(points, CRS(proj_string))

# Extract X and Y coordinates
X <- coordinates(points_bc_albers)[, 1]
Y <- coordinates(points_bc_albers)[, 2]

# Create a dataframe with X and Y columns
df_bc_albers <- data.frame(X = X, Y = Y)

# Now, 'df_bc_albers' contains the X and Y columns in the BC Albers projection

Convert to ppp object

# remove outside points
valid_points <- df_bc_albers[inside.owin(df_bc_albers$X, df_bc_albers$Y, as.owin(DATA$Window)), ]
hornet_ppp <- ppp(
  x = valid_points$X,  # X coordinates of valid points
  y = valid_points$Y,  # Y coordinates of valid points
  window = as.owin(DATA$Window)  # Observation window
)
## Warning: data contain duplicated points
plot(hornet_ppp)

Exploratory Data Analysis

Intensity calculation

From the plot, we can see most hornets tend to gather in lower BC, showing high intensity especially at the southwest corner. But for other part, especially for upper BC, there is no evidence of correlation in hornet location from eyes, indicating the distribution is inhomogeneous.

win_km <- rescale(Window(hornet_ppp), 1000, "km")

# Intensity in trees/km^2
npoints(hornet_ppp)/area(win_km)
## [1] 0.001371983

I dont think the estimated intensity trustworthy. Because from the plot we cannot tell the hornet location is homogeneous.

quadrat tets

Q <- quadratcount(hornet_ppp,
                  nx = 10,
                  ny = 10)
quadrat.test(Q)
## Warning: Some expected counts are small; chi^2 approximation may be inaccurate
## 
##  Chi-squared test of CSR using quadrat counts
## 
## data:  
## X2 = 28479, df = 63, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
## Quadrats: 64 tiles (irregular windows)

The small p-value suggests that there is a significant deviation from homogeneity.

plot(Q, main = "Quadrat Plot with Hornet Points", xlab = "X Coordinate", ylab = "Y Coordinate")
points(hornet_ppp, pch = 20, col = "green")  # Overlay hornet points

intensity(Q)
## tile
##   Tile row 1, col 1   Tile row 1, col 2   Tile row 1, col 3   Tile row 1, col 4 
##        0.000000e+00        1.734704e-10        0.000000e+00        2.561876e-10 
##   Tile row 1, col 5   Tile row 1, col 6   Tile row 1, col 7   Tile row 2, col 2 
##        6.279768e-10        0.000000e+00        0.000000e+00        0.000000e+00 
##   Tile row 2, col 3   Tile row 2, col 4   Tile row 2, col 5   Tile row 2, col 6 
##        0.000000e+00        0.000000e+00        0.000000e+00        1.374831e-10 
##   Tile row 2, col 7   Tile row 3, col 2   Tile row 3, col 3   Tile row 3, col 4 
##        0.000000e+00        0.000000e+00        0.000000e+00        0.000000e+00 
##   Tile row 3, col 5   Tile row 3, col 6   Tile row 3, col 7   Tile row 4, col 3 
##        0.000000e+00        0.000000e+00        0.000000e+00        0.000000e+00 
##   Tile row 4, col 4   Tile row 4, col 5   Tile row 4, col 6   Tile row 4, col 7 
##        4.582771e-11        0.000000e+00        0.000000e+00        3.146336e-10 
##   Tile row 5, col 3   Tile row 5, col 4   Tile row 5, col 5   Tile row 5, col 6 
##        0.000000e+00        2.774770e-10        5.499325e-10        4.582771e-11 
##   Tile row 5, col 7   Tile row 6, col 2   Tile row 6, col 3   Tile row 6, col 4 
##        0.000000e+00        0.000000e+00        0.000000e+00        4.878190e-11 
##   Tile row 6, col 5   Tile row 6, col 6   Tile row 6, col 7   Tile row 6, col 8 
##        9.165542e-11        1.237348e-09        4.591895e-11        0.000000e+00 
##   Tile row 7, col 2   Tile row 7, col 3   Tile row 7, col 4   Tile row 7, col 5 
##        0.000000e+00        0.000000e+00        7.697947e-11        4.666335e-11 
##   Tile row 7, col 6   Tile row 7, col 7   Tile row 7, col 8   Tile row 7, col 9 
##        9.165542e-11        9.165542e-11        5.162269e-11        0.000000e+00 
##   Tile row 8, col 4   Tile row 8, col 5   Tile row 8, col 6   Tile row 8, col 7 
##        0.000000e+00        0.000000e+00        9.195917e-11        7.332433e-10 
##   Tile row 8, col 8   Tile row 8, col 9  Tile row 8, col 10   Tile row 9, col 4 
##        3.712044e-09        1.500022e-09        2.603879e-10        0.000000e+00 
##   Tile row 9, col 5   Tile row 9, col 6   Tile row 9, col 7   Tile row 9, col 8 
##        1.741078e-09        2.327311e-09        5.499325e-10        4.491115e-09 
##   Tile row 9, col 9  Tile row 9, col 10  Tile row 10, col 5  Tile row 10, col 6 
##        1.420659e-09        7.075868e-10        1.130684e-09        5.472572e-08 
##  Tile row 10, col 7  Tile row 10, col 8  Tile row 10, col 9 Tile row 10, col 10 
##        1.855185e-08        2.875371e-09        2.577527e-09        0.000000e+00
plot ( intensity ( Q , image = T))

lambda_u <- mean (Q)
R_Poisson <- rpois (n = length (Q) ,
lambda = lambda_u)
hist(Q)

hist(R_Poisson)

These data are clearly inhomogeneous.

Kernel estimation with likelihood cross validation bandwidth selection & Hotspot analysis

plot(density(hornet_ppp, sigma = bw.ppl),  # Likelihood Cross Validation Bandwidth Selection
     ribbon = F,
     main = "Hornet Kernel Estimation")

R <- bw.ppl(hornet_ppp)

#Calculate test statistic
LR <- scanLRTS(hornet_ppp, r = R)

#Plot the output 
plot(LR,main='Hotspot Analysis')
plot(window,add = TRUE)

Ripley’s \(K\)-function, test for a significant ( \(\alpha\) = 0.05) correlation between hornet locations

#Estimate a strictly positive density
lambda_hornet_pos <- density(hornet_ppp,
                          sigma=bw.ppl,
                          positive=TRUE)

#Simulation envelope (with points drawn from the estimated intensity)
E_hornet_inhom <- envelope(hornet_ppp,
                        Kinhom,
                        simulate = expression(rpoispp(lambda_hornet_pos)),
                        correction="border",
                        rank = 1,
                        nsim = 19,
                        fix.n = TRUE)
## Warning in envelope.ppp(hornet_ppp, Kinhom, simulate =
## expression(rpoispp(lambda_hornet_pos)), : fix.n and fix.marks were ignored,
## because 'simulate' was given
## Generating 19 simulations by evaluating expression  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 
## 19.
## 
## Done.
# visualise the results
par(mfrow = c(1,2))
plot(E_hornet_inhom,
     main = "",
     lwd = 2)
# Zoom in on range where significant deviations appear
plot(E_hornet_inhom,
     xlim = c(0,30000),
     main = "",
     lwd = 2)

When corrected for inhomogeneity, significant clustering only appears to exist in and around 0-22000 meters. For longer distance, we cannot indicate a significant evidence of correlation in hornet distribution.

Pair correlation function (95% confidence interval)

pcf_hornet_inhom <- envelope(hornet_ppp,
                          pcfinhom,
                          simulate = expression(rpoispp(lambda_hornet_pos)),
                          rank = 1,
                          nsim = 19)
## Generating 19 simulations by evaluating expression  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 
## 19.
## 
## Done.
par(mfrow = c(1,3))
plot(pcf_hornet_inhom)

# Zoom in on range where significant deviations appear
plot(pcf_hornet_inhom,
     xlim = c(0,10000),
     main = "",
     lwd = 2)

plot(pcf_hornet_inhom,
     xlim = c(15000,50000),
     main = "",
     lwd = 2)

There appear to be more hornet than expected by random chance between ∼ 0 - 6000 meters and less hornet between 15000 - 50000 meters. Except that, the locations of hornet appear not to exhibit any significant correlations.

Single Indicator Exploring

Distribution and Elevation

plot(DATA$Elevation, main = "Elevation Map of British Columbia", xlab = "Longitude", ylab = "Latitude")

# Plot park locations on top of elevation map
plot(hornet_ppp, add = TRUE,cex = 0.4)

# Define elevation classes based on quantiles
elevation_classes <- cut(DATA$Elevation, breaks = 5, labels = FALSE)

# Plot the elevation class image
image(elevation_classes, col = terrain.colors(5), main = "Elevation Classes")
plot(hornet_ppp, add = TRUE,cex = 0.4)

From plot, we could find that most hornets gather in Class 1 and 2.

# Median elevation within British Columbia
median_bc_elevation <- median(DATA$Elevation, na.rm = TRUE)

# Median elevation at park locations
median_hornet_elevation <- median(DATA$Elevation[hornet_ppp], na.rm = TRUE)

# Print results
print(paste("Median elevation within British Columbia:", median_bc_elevation))
## [1] "Median elevation within British Columbia: 1096.85723142938"
print(paste("Median elevation at hornet locations:", median_hornet_elevation))
## [1] "Median elevation at hornet locations: 74.4980033424321"
BC_elevation_density <- density(as.data.frame(DATA$Elevation, na.rm = TRUE)$value)

# Extract elevation values from DATA$Elevation using hornet_ppp
elevation_values <- as.vector(DATA$Elevation[hornet_ppp])

# Generate a kernel density estimate of the distribution of elevation values within hornet
hornet_elevation_density <- density(elevation_values, na.rm = TRUE)

plot(BC_elevation_density, col = "blue", main = "Kernel Density Estimate of Elevation in British Columbia", xlab = "Elevation",ylim = c(0, max(hornet_elevation_density$y)))

# Overlay the kernel density estimate of elevation values for park locations
lines(hornet_elevation_density, col = "red")

# Add legend
legend("topright", legend = c("British Columbia", "hornet Locations"), col = c("blue", "red"), lty = 1, cex = 0.8)

Forest and Distribution

plot(DATA$Forest, main = "Forest Map of British Columbia", xlab = "Longitude", ylab = "Latitude")

# Plot park locations on top of elevation map
plot(hornet_ppp, add = TRUE,cex = 0.4)

# Define elevation classes based on quantiles
forest_classes <- cut(DATA$Forest, breaks = 5, labels = FALSE)

# Plot the elevation class image
image(forest_classes, col = terrain.colors(5), main = "Forest Classes")
plot(hornet_ppp, add = TRUE,cex = 0.4)

From plot, we could find that most hornets gather in Class 4 and 5.

# Median forestdensity within British Columbia
median_bc_forest <- median(DATA$Forest, na.rm = TRUE)

# Median forest density at park locations
median_hornet_forest <- median(DATA$Forest[hornet_ppp], na.rm = TRUE)

# Print results
print(paste("Median Forest Density within British Columbia:", median_bc_forest))
## [1] "Median Forest Density within British Columbia: 50.1524677276611"
print(paste("Median Forest Density at hornet locations:", median_hornet_forest))
## [1] "Median Forest Density at hornet locations: 19.1723461151123"
BC_forest_density <- density(as.data.frame(DATA$Forest, na.rm = TRUE)$value)

# Extract forest values from DATA$Forest using hornet_ppp
forest_values <- as.vector(DATA$Forest[hornet_ppp])

# Generate a kernel density estimate of the distribution of forest values within hornet
hornet_forest_density <- density(forest_values, na.rm = TRUE)

plot(BC_forest_density, col = "blue", main = "Kernel Density Estimate of Forest in British Columbia", xlab = "Forest",ylim = c(0, max(BC_forest_density$y)))

# Overlay the kernel density estimate of forest values for hornet locations
lines(hornet_forest_density, col = "red")

# Add legend
legend("topright", legend = c("British Columbia", "hornet Locations"), col = c("blue", "red"), lty = 1, cex = 0.8)

### Distance to Water Map

plot(DATA$Dist_Water, main = "Distance to Water Map of British Columbia", xlab = "Longitude", ylab = "Latitude")

# Plot park locations on top of elevation map
plot(hornet_ppp, add = TRUE,cex = 0.4,col = 'white')

From the graph we can find that the distance is almost pretty low from water within BC, so i tend not to go deeper into the single indicator.

HFI

plot(DATA$HFI, main = "HFI Map of British Columbia", xlab = "Longitude", ylab = "Latitude")

# Plot park locations on top of elevation map
plot(hornet_ppp, add = TRUE,cex = 0.4, col='white')

From the graph we can find that HFI is almost pretty low within BC, so i tend not to go deeper into the single indicator.

Poisson Point Process for Indicators

Model Fitting

Estimate \(\rho\) for the locations of parks as a function of elevation/forest/water distance/HFI

elev <- DATA$Elevation
hornet_elev_rho <- rhohat(hornet_ppp, elev)
plot(hornet_elev_rho,xlim = c(0,max(elev)))

fors <- DATA$Forest
hornet_forest_rho <- rhohat(hornet_ppp, fors)
plot(hornet_forest_rho,xlim = c(0,max(fors)))

dist_water <- DATA$Dist_Water
hornet_dist_rho <- rhohat(hornet_ppp, dist_water)
plot(hornet_dist_rho,xlim = c(0,max(dist_water)))

hfi <- DATA$HFI

So we could find that the three indicators cannot be linear with hornet distribution, which lead to the formula below.

fit <- ppm(hornet_ppp ~ elev  + fors  + dist_water +  hfi , data = DATA)
## Warning: Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of
## 4230) of the quadrature points. Occurred while executing: ppm.ppp(Q =
## hornet_ppp, trend = ~elev + fors + dist_water + hfi,
fit
## Nonstationary Poisson process
## Fitted to point pattern dataset 'hornet_ppp'
## 
## Log intensity:  ~elev + fors + dist_water + hfi
## 
## Fitted trend coefficients:
##   (Intercept)          elev          fors    dist_water           hfi 
## -2.085013e+01 -2.136346e-03  4.101943e-04 -6.229289e-05  6.388678e+00 
## 
##                  Estimate         S.E.       CI95.lo       CI95.hi Ztest
## (Intercept) -2.085013e+01 1.394446e-01 -2.112344e+01 -2.057683e+01   ***
## elev        -2.136346e-03 1.018394e-04 -2.335948e-03 -1.936745e-03   ***
## fors         4.101943e-04 1.347051e-03 -2.229978e-03  3.050367e-03      
## dist_water  -6.229289e-05 1.424637e-05 -9.021527e-05 -3.437051e-05   ***
## hfi          6.388678e+00 1.516429e-01  6.091463e+00  6.685893e+00   ***
##                     Zval
## (Intercept) -149.5226796
## elev         -20.9775976
## fors           0.3045127
## dist_water    -4.3725438
## hfi           42.1297487
## Problem:
##  Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of 4230) 
## of the quadrature points

From the Ztest, we’d better remove forest

fit_1 <- ppm(hornet_ppp ~ elev +  dist_water + hfi, data = DATA)
## Warning: Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of
## 4230) of the quadrature points. Occurred while executing: ppm.ppp(Q =
## hornet_ppp, trend = ~elev + dist_water + hfi, data = list(
fit_1
## Nonstationary Poisson process
## Fitted to point pattern dataset 'hornet_ppp'
## 
## Log intensity:  ~elev + dist_water + hfi
## 
## Fitted trend coefficients:
##   (Intercept)          elev    dist_water           hfi 
## -2.082381e+01 -2.135456e-03 -6.280277e-05  6.366200e+00 
## 
##                  Estimate         S.E.       CI95.lo       CI95.hi Ztest
## (Intercept) -2.082381e+01 1.092606e-01 -2.103796e+01 -2.060967e+01   ***
## elev        -2.135456e-03 1.018300e-04 -2.335039e-03 -1.935872e-03   ***
## dist_water  -6.280277e-05 1.414419e-05 -9.052487e-05 -3.508066e-05   ***
## hfi          6.366200e+00 1.323565e-01  6.106786e+00  6.625614e+00   ***
##                    Zval
## (Intercept) -190.588484
## elev         -20.970799
## dist_water    -4.440181
## hfi           48.098883
## Problem:
##  Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of 4230) 
## of the quadrature points
fit_2 <- ppm(hornet_ppp ~ elev + I(elev^2)+ dist_water + hfi + I(hfi^2), data = DATA)
## Warning: Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of
## 4230) of the quadrature points. Occurred while executing: ppm.ppp(Q =
## hornet_ppp, trend = ~elev + I(elev^2) + dist_water +
fit_2
## Nonstationary Poisson process
## Fitted to point pattern dataset 'hornet_ppp'
## 
## Log intensity:  ~elev + I(elev^2) + dist_water + hfi + I(hfi^2)
## 
## Fitted trend coefficients:
##   (Intercept)          elev     I(elev^2)    dist_water           hfi 
## -2.120567e+01 -3.327566e-03  9.273141e-07 -5.705994e-05  1.034704e+01 
##      I(hfi^2) 
## -4.270298e+00 
## 
##                  Estimate         S.E.       CI95.lo       CI95.hi Ztest
## (Intercept) -2.120567e+01 1.406367e-01 -2.148132e+01 -2.093003e+01   ***
## elev        -3.327566e-03 2.021561e-04 -3.723784e-03 -2.931347e-03   ***
## I(elev^2)    9.273141e-07 1.358787e-07  6.609967e-07  1.193631e-06   ***
## dist_water  -5.705994e-05 1.387989e-05 -8.426403e-05 -2.985586e-05   ***
## hfi          1.034704e+01 5.855823e-01  9.199316e+00  1.149476e+01   ***
## I(hfi^2)    -4.270298e+00 5.922054e-01 -5.430999e+00 -3.109596e+00   ***
##                    Zval
## (Intercept) -150.783353
## elev         -16.460374
## I(elev^2)      6.824573
## dist_water    -4.110979
## hfi           17.669652
## I(hfi^2)      -7.210838
## Problem:
##  Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of 4230) 
## of the quadrature points

The coefficients are all statistically significant.

#Plot the model predictions
plot(fit_2,
     se = FALSE,
     superimpose = FALSE)

#Overlay the B. pendula locations
plot(hornet_ppp,
     pch = 16,
     cex = 0.4,
     cols = "white",
     add = TRUE)
plot(hornet_ppp,
     pch = 16,
     cex = 0.3,
     cols = "black",
     add = TRUE)

Model Selection

AIC(fit_2);AIC(fit_1) 
## [1] 47779.17
## [1] 47850.98
BIC(fit_2); BIC(fit_1)
## [1] 47810.2
## [1] 47871.66

We could find that AIC and BIC fall down when we remove one feature. So the extra complexity is not well supported by the data.

Model Validation

#Run the quadrat test
quadrat.test(fit_2, nx = 10, ny = 10)
## Warning: Some expected counts are small; chi^2 approximation may be inaccurate
## 
##  Chi-squared test of fitted Poisson model 'fit_2' using quadrat counts
## 
## data:  data from fit_2
## X2 = 557.63, df = 58, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
## Quadrats: 64 tiles (irregular windows)

The small p value tells us that there’s a significant deviation from our model’s predictions. While this is useful for suggesting that our model has room for improvement, it provides us with no direction on how to do so (e.g., missing parameters, model mispecification (e.g., polynomial vs. linear), a lack of independence, non-stationarity, etc…).

#Calculate the residuals
res <- residuals(fit_2)
## Warning: Some infinite, NA or NaN increments were removed
na_indexes <- which(is.na(res$val))
# If you need to exclude NA values explicitly
if (length(na_indexes) > 0) {
  res <- res[-na_indexes]
  plot(res, cols='transparent')
} else {
  plot(res, cols='transparent')
}

We can see some pattern in the residual plot. We still need to check

#Calculate the partial residuals as a function of elevation
par_res_elev0 <- parres(fit_2, "elev")
## Warning: Some infinite, NA or NaN increments were removed
#Calculate the relative intensity as a function of for
par_res_hfi0 <- parres(fit_2, "hfi")
## Warning: Some infinite, NA or NaN increments were removed
## Warning: Values for 2 query points lying outside the pixel image domain were
## estimated by projection to the nearest pixel
par_res_water0 <- parres(fit_2,"dist_water")
## Warning: Some infinite, NA or NaN increments were removed
#Side by side plotting
par(mfrow = c(1,3))
plot(par_res_elev0,
     legend = FALSE,
     lwd = 2,
     main = "",
     xlab = "Elevation (m)")
plot(par_res_hfi0,
     legend = FALSE,
     lwd = 2,
     main = "",
     xlab = "Forest")
plot(par_res_water0,
     legend = FALSE,
     lwd = 2,
     main = "",
     xlab = "Distance to Water")

The plot shows that each feature could explain the intensity well.

library(splines)

#Fit the PPP model
fit_smooth <- ppm(hornet_ppp ~ bs(elev,4) + bs(hfi, 7) + bs(dist_water,2), data = DATA, use.gam = TRUE)
## Warning: Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of
## 4230) of the quadrature points. Occurred while executing: ppm.ppp(Q =
## hornet_ppp, trend = ~bs(elev, 4) + bs(hfi, 7) + bs(dist_water,
## Warning in bs(dist_water, 2): 'df' was too small; have used 3

## Warning in bs(dist_water, 2): 'df' was too small; have used 3
fit_smooth
## Nonstationary Poisson process
## Fitted to point pattern dataset 'hornet_ppp'
## 
## Log intensity:  ~bs(elev, 4) + bs(hfi, 7) + bs(dist_water, 2)
## 
## Fitted trend coefficients:
##        (Intercept)       bs(elev, 4)1       bs(elev, 4)2       bs(elev, 4)3 
##        -22.6986660         -2.0545674         -2.4037646         -4.0002691 
##       bs(elev, 4)4        bs(hfi, 7)1        bs(hfi, 7)2        bs(hfi, 7)3 
##        -11.3093207          0.9351316          2.3075726          2.4558059 
##        bs(hfi, 7)4        bs(hfi, 7)5        bs(hfi, 7)6        bs(hfi, 7)7 
##          5.4655838          6.7798595          6.9551590          9.2755929 
## bs(dist_water, 2)1 bs(dist_water, 2)2 bs(dist_water, 2)3 
##          0.7654902         -3.0664677         -2.3735535 
## 
## For standard errors, type coef(summary(x))
## Problem:
##  Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of 4230) 
## of the quadrature points
coef(summary(fit_smooth))
## Warning: model was fitted by gam(); asymptotic variance calculation ignores
## this
##                       Estimate      S.E.      CI95.lo     CI95.hi Ztest
## (Intercept)        -22.6986660 0.7302310 -24.12989251 -21.2674395   ***
## bs(elev, 4)1        -2.0545674 0.2503921  -2.54532701  -1.5638079   ***
## bs(elev, 4)2        -2.4037646 0.5240757  -3.43093409  -1.3765952   ***
## bs(elev, 4)3        -4.0002691 1.2726075  -6.49453398  -1.5060043    **
## bs(elev, 4)4       -11.3093207 5.6102003 -22.30511111  -0.3135302     *
## bs(hfi, 7)1          0.9351316 0.9798759  -0.98538988   2.8556531      
## bs(hfi, 7)2          2.3075726 0.8685392   0.60526719   4.0098781    **
## bs(hfi, 7)3          2.4558059 0.7503720   0.98510380   3.9265081    **
## bs(hfi, 7)4          5.4655838 0.7615734   3.97292739   6.9582403   ***
## bs(hfi, 7)5          6.7798595 0.7676102   5.27537107   8.2843479   ***
## bs(hfi, 7)6          6.9551590 0.7374069   5.50986799   8.4004500   ***
## bs(hfi, 7)7          9.2755929 0.7572240   7.79146117  10.7597246   ***
## bs(dist_water, 2)1   0.7654902 0.3477852   0.08384376   1.4471366     *
## bs(dist_water, 2)2  -3.0664677 1.0435042  -5.11169839  -1.0212370    **
## bs(dist_water, 2)3  -2.3735535 2.0392890  -6.37048648   1.6233794      
##                           Zval
## (Intercept)        -31.0842253
## bs(elev, 4)1        -8.2053990
## bs(elev, 4)2        -4.5866746
## bs(elev, 4)3        -3.1433644
## bs(elev, 4)4        -2.0158497
## bs(hfi, 7)1          0.9543368
## bs(hfi, 7)2          2.6568435
## bs(hfi, 7)3          3.2727845
## bs(hfi, 7)4          7.1767000
## bs(hfi, 7)5          8.8324246
## bs(hfi, 7)6          9.4319143
## bs(hfi, 7)7         12.2494706
## bs(dist_water, 2)1   2.2010432
## bs(dist_water, 2)2  -2.9386251
## bs(dist_water, 2)3  -1.1639123
#Calculate the partial residuals as a function of elevation
par_res_elev <- parres(fit_smooth, "elev")
## Warning: Some infinite, NA or NaN increments were removed
## Warning in bs(hfi, 7): all interior knots match left boundary knot
## Warning in bs(hfi, 7): all interior knots match right boundary knot
## Warning in bs(dist_water, 2): 'df' was too small; have used 3
#Calculate the relative intensity as a function of for
par_res_hfi <- parres(fit_smooth, "hfi")
## Warning: Some infinite, NA or NaN increments were removed
## Warning: Values for 2 query points lying outside the pixel image domain were
## estimated by projection to the nearest pixel
## Warning in bs(elev, 4): all interior knots match left boundary knot
## Warning in bs(elev, 4): all interior knots match right boundary knot
## Warning in bs(dist_water, 2): 'df' was too small; have used 3
par_res_water <- parres(fit_smooth,"dist_water")
## Warning: Some infinite, NA or NaN increments were removed
## Warning in bs(elev, 4): all interior knots match left boundary knot
## Warning in bs(elev, 4): all interior knots match right boundary knot
## Warning in bs(hfi, 7): all interior knots match left boundary knot
## Warning in bs(hfi, 7): all interior knots match right boundary knot
## Warning in bs(dist_water, 2): 'df' was too small; have used 3
#Side by side plotting
par(mfrow = c(1,3))
plot(par_res_elev,
     legend = FALSE,
     lwd = 2,
     main = "",
     xlab = "Elevation (m)")
plot(par_res_hfi,
     legend = FALSE,
     lwd = 2,
     main = "",
     xlab = "HFI")
plot(par_res_water,
     legend = FALSE,
     lwd = 2,
     main = "",
     xlab = "Distance to Water")

It is not good, but better than previous one.

AIC(fit_2);AIC(fit_smooth)
## [1] 47779.17
## [1] 47645
BIC(fit_2);BIC(fit_smooth)
## [1] 47810.2
## [1] 47722.56
anova(fit_2, fit_smooth, test = "LRT")
## Warning: Models were re-fitted with use.gam=TRUE
## Warning: Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of
## 4230) of the quadrature points. Occurred while executing: ppm.ppp(Q =
## hornet_ppp, trend = ~elev + I(elev^2) + dist_water +
## Warning: Values of the covariate 'hfi' were NA or undefined at 0.31% (13 out of
## 4230) of the quadrature points. Occurred while executing: ppm.ppp(Q =
## hornet_ppp, trend = ~bs(elev, 4) + bs(hfi, 7) + bs(dist_water,
## Warning in bs(dist_water, 2): 'df' was too small; have used 3

## Warning in bs(dist_water, 2): 'df' was too small; have used 3
## Analysis of Deviance Table
## 
## Model 1: ~elev + I(elev^2) + dist_water + hfi + I(hfi^2)      Poisson
## Model 2: ~bs(elev, 4) + bs(hfi, 7) + bs(dist_water, 2)    Poisson
##   Npar Df Deviance  Pr(>Chi)    
## 1    6                          
## 2   15  9   152.18 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The ANOVA results indicate that Model 2 has significantly better fit to the data than Model 1, as evidenced by the large difference in deviance (516.81) and the highly significant p-value (< 2.2e-16), which is well below any conventional significance level. All lines of evidence point towards these more complex models being a better fit to the data. The final model is too long to write down, but we can visualise the predictions just as before.

#Calculate the residuals
res1 <- residuals(fit_smooth)
## Warning: Some infinite, NA or NaN increments were removed
na_indexes1 <- which(is.na(res1$val))
# If you need to exclude NA values explicitly
if (length(na_indexes1) > 0) {
  res1 <- res[-na_indexes1]
  plot(res1, cols='transparent')
} else {
  plot(res1, cols='transparent')
}

#Plot the model predictions
plot(fit_smooth,
     se = FALSE,
     superimpose = FALSE,
     main = "Estimated Hornet intensity")

plot(hornet_ppp,
     pch = 16,
     cex = 0.6,
     cols = "white",
     add = TRUE)
plot(hornet_ppp,
     pch = 16,
     cex = 0.5,
     cols = "black",
     add = TRUE)